Arrests <- read_csv("Police_Arrests_Clean.csv")
## Rows: 37311 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): Charge, Street, City, State, Race, Gender, Ethnicity, Arrest_Type...
## dbl (6): Incident_Id, Zip, Age, latitude, longitude, Object_Id
## dttm (1): Arrest_Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Incidents <- read_csv("Police_Incidents_Clean.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 109004 Columns: 20
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): Agency, Offense, Street, City, State, Reported.As, Premise, Forci...
## dbl (6): Incident_Id, Zip, Victim_Age, Latitude, Longitude, Object_Id
## dttm (3): Report_Date, Occur_Date, Found_Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Data Inspection
# Check structure of datasets
glimpse(Arrests)
## Rows: 37,311
## Columns: 18
## $ Incident_Id <dbl> 24988, 26403, 24155, 34658, 25254, 26805, 28379, 27807, …
## $ Charge <chr> "SEX OFFENSE-2ND DEG", "2ND DEGREE TRESPASS", "COMMON LA…
## $ Street <chr> "200 GARDNER CIR", "224 KNOLLS ST", "128 JOHNSON STREET"…
## $ City <chr> "CHAPEL HILL", "CHAPEL HILL", "CHAPEL HILL", "CHAPEL HIL…
## $ State <chr> "NC", "NC", "NC", "NC", "NC", "NC", "NC", "NC", "NC", "N…
## $ Zip <dbl> 27516, 27516, 27516, 27516, 27516, 27516, 27516, 27516, …
## $ Arrest_Date <dttm> 2015-07-05 22:13:00, 2011-03-12 23:01:00, 2014-07-24 03…
## $ Age <dbl> 38, 56, 27, 59, 19, 44, 26, 25, 29, 19, 49, 24, 18, 19, …
## $ Race <chr> "W", "B", "B", "W", "W", "B", "B", "B", "B", "B", "B", "…
## $ Gender <chr> "M", "M", "M", "F", "F", "F", "M", "M", "M", "F", "M", "…
## $ Ethnicity <chr> "H", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", N…
## $ Arrest_Type <chr> "ON VIEW", "SUMMONED/CITED", "TAKEN INTO CUSTODY (WARRAN…
## $ Drugs_Alcohol <chr> "Y", "U", "U", "Y", "Y", "N", "N", "Y", "Y", "N", "Y", "…
## $ Weapon <chr> "UNARMED", "UNARMED", "UNARMED", "UNARMED", "UNARMED", "…
## $ Disposition <chr> "CLEARED BY ARREST", "CLEARED BY ARREST", "CLEARED BY AR…
## $ latitude <dbl> 35.89088, 35.90571, 35.90513, 35.92202, 35.93296, 35.947…
## $ longitude <dbl> -79.07104, -79.06835, -79.06875, -79.07185, -79.06871, -…
## $ Object_Id <dbl> 1, 3, 4, 5, 8, 9, 10, 12, 14, 15, 17, 18, 19, 20, 22, 23…
glimpse(Incidents)
## Rows: 109,004
## Columns: 20
## $ Incident_Id <dbl> 74984, 74487, 74299, 75136, 74985, 74445, 75130, 74990, …
## $ Agency <chr> "CHPD", "CHPD", "CHPD", "CHPD", "CHPD", "CHPD", "CHPD", …
## $ Offense <chr> "TRESPASSING", "DOMESTIC DISTURBANCE/NO ASSAULT", "DOMES…
## $ Street <chr> "1301 FORDHAM BLVD", "103 PINEGATE CIR", "377 S ESTES DR…
## $ City <chr> "CHAPEL HILL", "CHAPEL HILL", "CHAPEL HILL", "CHAPEL HIL…
## $ State <chr> "NC", "NC", "NC", "NC", "NC", "NC", "NC", "NC", "NC", "N…
## $ Zip <dbl> 27517, 27514, 27517, 27517, 27514, 27516, 27516, 27514, …
## $ Report_Date <dttm> 2010-02-19 00:54:00, 2010-01-20 00:00:00, 2010-01-10 11…
## $ Occur_Date <dttm> 2010-02-19 00:53:00, 2010-01-19 23:59:00, 2010-01-10 11…
## $ Found_Date <dttm> 2010-02-19 00:54:00, 2010-01-20 00:00:00, 2010-01-10 11…
## $ Reported.As <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ Premise <chr> "HOTEL/MOTEL", "HOME OF VICTIM - OTHER DWELLING", "HOME …
## $ Forcible <chr> "N", "N", "Y", "N", "Y", "N", "Y", "N", "N", "N", NA, "N…
## $ Weapon <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ Victim_Age <dbl> NA, 52, 35, 33, 30, NA, 43, NA, 21, 66, 76, NA, 22, NA, …
## $ Victim_Race <chr> NA, "B", "B", "W", "W", "W", "W", NA, "I", "W", "W", NA,…
## $ Victim_Gender <chr> NA, "F", "F", "F", "M", "M", "F", NA, "M", "F", "F", NA,…
## $ Latitude <dbl> 35.93626, 35.94832, 35.92433, 35.94095, 35.91457, 35.905…
## $ Longitude <dbl> -79.02344, -79.00841, -79.02229, -79.00953, -79.05288, -…
## $ Object_Id <dbl> 1, 2, 3, 4, 5, 6, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19,…
names(Arrests)
## [1] "Incident_Id" "Charge" "Street" "City"
## [5] "State" "Zip" "Arrest_Date" "Age"
## [9] "Race" "Gender" "Ethnicity" "Arrest_Type"
## [13] "Drugs_Alcohol" "Weapon" "Disposition" "latitude"
## [17] "longitude" "Object_Id"
names(Incidents)
## [1] "Incident_Id" "Agency" "Offense" "Street"
## [5] "City" "State" "Zip" "Report_Date"
## [9] "Occur_Date" "Found_Date" "Reported.As" "Premise"
## [13] "Forcible" "Weapon" "Victim_Age" "Victim_Race"
## [17] "Victim_Gender" "Latitude" "Longitude" "Object_Id"
head(Arrests)
## # A tibble: 6 × 18
## Incident_Id Charge Street City State Zip Arrest_Date Age Race
## <dbl> <chr> <chr> <chr> <chr> <dbl> <dttm> <dbl> <chr>
## 1 24988 SEX OFFE… 200 G… CHAP… NC 27516 2015-07-05 22:13:00 38 W
## 2 26403 2ND DEGR… 224 K… CHAP… NC 27516 2011-03-12 23:01:00 56 B
## 3 24155 COMMON L… 128 J… CHAP… NC 27516 2014-07-24 03:41:00 27 B
## 4 34658 POSS COC… 300 S… CHAP… NC 27516 2019-06-18 14:55:00 59 W
## 5 25254 PROVISIO… 119 N… CHAP… NC 27516 2017-12-21 04:40:00 19 W
## 6 26805 FAIL TO … 100 C… CHAP… NC 27516 2015-10-10 16:11:00 44 B
## # ℹ 9 more variables: Gender <chr>, Ethnicity <chr>, Arrest_Type <chr>,
## # Drugs_Alcohol <chr>, Weapon <chr>, Disposition <chr>, latitude <dbl>,
## # longitude <dbl>, Object_Id <dbl>
head(Incidents)
## # A tibble: 6 × 20
## Incident_Id Agency Offense Street City State Zip Report_Date
## <dbl> <chr> <chr> <chr> <chr> <chr> <dbl> <dttm>
## 1 74984 CHPD TRESPASSING 1301 … CHAP… NC 27517 2010-02-19 00:54:00
## 2 74487 CHPD DOMESTIC DIST… 103 P… CHAP… NC 27514 2010-01-20 00:00:00
## 3 74299 CHPD DOMESTIC SIMP… 377 S… CHAP… NC 27517 2010-01-10 11:30:00
## 4 75136 CHPD B&E RESIDENCE… 230 S… CHAP… NC 27517 2010-02-26 14:16:00
## 5 74985 CHPD SIMPLE ASSAULT 201 E… CHAP… NC 27514 2010-02-19 01:29:00
## 6 74445 CHPD B&E RESIDENCE… 515 S… CHAP… NC 27516 2010-01-17 18:30:00
## # ℹ 12 more variables: Occur_Date <dttm>, Found_Date <dttm>, Reported.As <chr>,
## # Premise <chr>, Forcible <chr>, Weapon <chr>, Victim_Age <dbl>,
## # Victim_Race <chr>, Victim_Gender <chr>, Latitude <dbl>, Longitude <dbl>,
## # Object_Id <dbl>
Convert dates
Arrests <- Arrests %>%
mutate(
ArrestDate = as.Date(Arrest_Date),
year = year(ArrestDate),
month_num = month(ArrestDate),
month = month(ArrestDate, label = TRUE),
season = case_when(
month_num %in% c(12, 1, 2) ~ "Winter",
month_num %in% c(3, 4, 5) ~ "Spring",
month_num %in% c(6, 7, 8) ~ "Summer",
month_num %in% c(9, 10, 11) ~ "Fall"
)
)
Incidents <- Incidents %>%
mutate(
IncidentDate = as.Date(Occur_Date),
year = year(IncidentDate),
month_num = month(IncidentDate),
month = month(IncidentDate, label = TRUE),
season = case_when(
month_num %in% c(12, 1, 2) ~ "Winter",
month_num %in% c(3, 4, 5) ~ "Spring",
month_num %in% c(6, 7, 8) ~ "Summer",
month_num %in% c(9, 10, 11) ~ "Fall"
)
)
head(Arrests)
## # A tibble: 6 × 23
## Incident_Id Charge Street City State Zip Arrest_Date Age Race
## <dbl> <chr> <chr> <chr> <chr> <dbl> <dttm> <dbl> <chr>
## 1 24988 SEX OFFE… 200 G… CHAP… NC 27516 2015-07-05 22:13:00 38 W
## 2 26403 2ND DEGR… 224 K… CHAP… NC 27516 2011-03-12 23:01:00 56 B
## 3 24155 COMMON L… 128 J… CHAP… NC 27516 2014-07-24 03:41:00 27 B
## 4 34658 POSS COC… 300 S… CHAP… NC 27516 2019-06-18 14:55:00 59 W
## 5 25254 PROVISIO… 119 N… CHAP… NC 27516 2017-12-21 04:40:00 19 W
## 6 26805 FAIL TO … 100 C… CHAP… NC 27516 2015-10-10 16:11:00 44 B
## # ℹ 14 more variables: Gender <chr>, Ethnicity <chr>, Arrest_Type <chr>,
## # Drugs_Alcohol <chr>, Weapon <chr>, Disposition <chr>, latitude <dbl>,
## # longitude <dbl>, Object_Id <dbl>, ArrestDate <date>, year <dbl>,
## # month_num <dbl>, month <ord>, season <chr>
head(Incidents)
## # A tibble: 6 × 25
## Incident_Id Agency Offense Street City State Zip Report_Date
## <dbl> <chr> <chr> <chr> <chr> <chr> <dbl> <dttm>
## 1 74984 CHPD TRESPASSING 1301 … CHAP… NC 27517 2010-02-19 00:54:00
## 2 74487 CHPD DOMESTIC DIST… 103 P… CHAP… NC 27514 2010-01-20 00:00:00
## 3 74299 CHPD DOMESTIC SIMP… 377 S… CHAP… NC 27517 2010-01-10 11:30:00
## 4 75136 CHPD B&E RESIDENCE… 230 S… CHAP… NC 27517 2010-02-26 14:16:00
## 5 74985 CHPD SIMPLE ASSAULT 201 E… CHAP… NC 27514 2010-02-19 01:29:00
## 6 74445 CHPD B&E RESIDENCE… 515 S… CHAP… NC 27516 2010-01-17 18:30:00
## # ℹ 17 more variables: Occur_Date <dttm>, Found_Date <dttm>, Reported.As <chr>,
## # Premise <chr>, Forcible <chr>, Weapon <chr>, Victim_Age <dbl>,
## # Victim_Race <chr>, Victim_Gender <chr>, Latitude <dbl>, Longitude <dbl>,
## # Object_Id <dbl>, IncidentDate <date>, year <dbl>, month_num <dbl>,
## # month <ord>, season <chr>
Arrests %>%
count(year) %>%
ggplot(aes(x = year, y = n)) +
geom_line() +
geom_point() +
labs(
title = "Yearly Arrests",
x = "Year", y = "Number of Arrests"
) +
theme_minimal()
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
observations:
- Arrests start high in ~2010, with counts over 3,500 per year.
- Arrest numbers remain relatively stable between ~2010 and 2016, fluctuating slightly between ~3,200–3,600.
- Beginning around 2017–2018, there is a noticeable decline in arrests.
- The drop becomes much sharper around 2019–2021, falling to nearly half of earlier levels — this likely corresponds with the COVID-19 pandemic and lockdowns, when many cities saw decreased police activity and fewer arrests, and also since we are missing arrest data from April to Sepetermber of 2021.
- After 2020, there’s a small rebound, but the counts remain significantly lower than pre-2017 levels.
Arrests %>%
count(year, month_num) %>%
mutate(YearMonth = as.Date(paste(year, month_num, "01", sep = "-"))) %>%
ggplot(aes(x = YearMonth, y = n)) +
geom_line() +
labs(title = "Monthly Arrests Over Time", x = "Year-Month", y = "Number of Arrests") +
scale_x_date(date_breaks = "6 months", date_labels = "%Y-%m") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
theme_minimal()
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
Observations:
2009–2016:
Arrests fluctuate between ~250–400 per month, with periodic peaks (approaching ~500) and valleys.
There appears to be some seasonality: higher counts in some mid-year (likely summer) months, lower in others (likely winter).
2017–2018:
Monthly counts start to decline gradually compared to earlier years. The peaks are less pronounced, staying mostly below 350.
2019–2020:
Sharp decline begins late 2019 and early 2020 — arrests drop rapidly to below 200 per month.
This drop aligns with the COVID-19 pandemic and lockdowns, when many jurisdictions reduced arrests.
2021–2025:
Counts remain low (~100–150 per month) compared to pre-2019 levels.
There are occasional small spikes, but they stay much lower than earlier years, suggesting a lasting reduction.
- Stable high activity (2009–2016): Monthly arrests consistently in the 300–400+ range.
- Gradual decline (2017–2018): Decrease starts before COVID.
- Sharp drop (2019–2020): Likely due to pandemic-related restrictions and changes in enforcement.
- Sustained lower levels (2021 onward): Arrests remain at about half or less of their previous levels — indicating potential long-term shifts.
HEAT MAP
Arrests %>%
count(year, month) %>%
ggplot(aes(x = month, y = factor(year), fill = n)) +
geom_tile() +
scale_fill_gradient(low = "skyblue", high = "darkblue") +
labs(
title = "Heatmap of Arrests by Month and Year",
x = "Month", y = "Year", fill = "Arrests"
)+
theme_minimal()
Observations:
2010–2016:
Arrest levels are high overall (green to yellow hues) with a clear concentration in summer months (June–August) — particularly July & August, which show the most intense (yellow) spots.
Winter months (January–February, December) consistently show lower levels (blueish shades), which is expected due to seasonality.
2017–2018:
Arrests begin to decline slightly — the heatmap colors shift towards cooler greens and blues even in summer months, suggesting fewer arrests compared to earlier years.
2019–2020:
A dramatic drop appears starting in 2020 — most months, including summer, turn dark blue indicating very low arrest counts.
The drop is most pronounced in spring and summer 2020 — coinciding with COVID-19 restrictions.
2021–2024:
Arrest levels remain low (blue to dark blue), and the distinct summer peaks almost disappear.
The seasonality seems flattened — suggesting the usual summer increase has weakened post-pandemic.
Seasonal pattern:
Consistent before 2017:
- Peaks: June–August
- Lows: January–February & December
Post-2020, this pattern weakens.
Boxplot: Distribution of arrests by month
# Prepare data
monthly_counts <- Arrests %>%
count(year, month) %>%
mutate(
month_num = as.numeric(month),
# assign season
season = case_when(
month_num %in% c(12, 1, 2) ~ "Winter",
month_num %in% c(3, 4, 5) ~ "Spring",
month_num %in% c(6, 7, 8) ~ "Summer",
month_num %in% c(9, 10, 11) ~ "Fall"
)
) %>%
filter(!is.na(season)) %>%
mutate(season = droplevels(factor(season)))
# Define seasonal colors
season_colors <- c(
"Winter" = "skyblue3",
"Spring" = "springgreen3",
"Summer" = "indianred",
"Fall" = "gold"
)
# Plot
p <- ggplot(monthly_counts, aes(x = month, y = n, fill = season, color = season, group = month)) +
geom_boxplot() +
scale_y_continuous(breaks = seq(0, 500, 50)) +
scale_fill_manual(values = season_colors) +
scale_color_manual(values = season_colors) +
labs(
title = 'Distribution of Monthly Arrests by Month',
subtitle = 'Year: {current_frame}',
x = 'Month', y = 'Number of Arrests', fill = "Season", color = "Season"
) +
theme_minimal(base_size = 14) +
theme(
plot.subtitle = element_text(size = 18, face = "bold", hjust = 0.5)
)
# Animate with transition_manual
anim <- p +
transition_manual(year)
# save animation:
# animate(anim, width = 900, height = 600, fps = 1, duration = length(unique(monthly_counts$year)) * 4, renderer = gifski_renderer())
# anim_save("animated_boxplot_monthly_arrests_synced.gif")
General Trends:
The boxplots show clear seasonality:
Winter (Jan–Feb, Dec) — lowest arrests, consistently around ~200–250.
Spring (Mar–May) — arrests start increasing, reaching ~300–350.
Summer (Jun–Aug) — peaks, especially July & August, with arrests reaching ~400–500.
Fall (Sep–Nov) — begins to decline from summer peaks, stabilizing around ~300.
The seasonal pattern is most visible and consistent in the earlier years (2010–2016).
#checking why no spring and summer in 2020
# List of all years & months in your data
all_months <- month.abb
# All year-month combinations
all_combinations <- expand_grid(
year = unique(Arrests$year),
month = all_months
)
# Actual counts
actual_counts <- Arrests %>%
count(year, month)
# Convert both month columns to character explicitly
all_combinations <- all_combinations %>%
mutate(month = as.character(month))
actual_counts <- actual_counts %>%
mutate(month = as.character(month))
# Left join + fill missing with 0
complete_counts <- all_combinations %>%
left_join(actual_counts, by = c("year", "month")) %>%
mutate(n = replace_na(n, 0)) %>%
arrange(year, match(month, all_months))
# Show missing months
missing_months <- complete_counts %>% filter(n == 0)
print(missing_months)
## # A tibble: 20 × 3
## year month n
## <dbl> <chr> <int>
## 1 2021 Apr 0
## 2 2021 May 0
## 3 2021 Jun 0
## 4 2021 Jul 0
## 5 2021 Aug 0
## 6 2021 Sep 0
## 7 2024 May 0
## 8 2024 Jun 0
## 9 NA Jan 0
## 10 NA Feb 0
## 11 NA Mar 0
## 12 NA Apr 0
## 13 NA May 0
## 14 NA Jun 0
## 15 NA Jul 0
## 16 NA Aug 0
## 17 NA Sep 0
## 18 NA Oct 0
## 19 NA Nov 0
## 20 NA Dec 0
Seasonal barplot
Arrests %>%
count(year, season) %>%
filter(!is.na(season)) %>%
ggplot(aes(x = factor(year), y = n, fill = season)) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_manual(values = season_colors) +
theme_minimal()+
labs(
title = "Arrests by Season and Year",
x = "Year", y = "Number of Arrests", fill = "Season"
)
General Observations:
1. In all years, Summer shows the highest number of arrests, peaking around ~1,200 in 2010–2016.
2. Fall and Spring are similar to each other, generally slightly below Summer but still high.
3. Winter consistently has the lowest number of arrests each year — roughly ~50–70% of Summer.
Trend Over Time:
Arrest numbers declined steadily from ~2015 to 2019 across all seasons.
- Summer arrests dropped from ~1,100 in 2014 to ~600–700 by 2019.
- Fall and Spring show a similar downward trend.
- Winter remained the lowest but also declined.
Impact of COVID-19:
In 2020–2021, a dramatic drop is visible in all seasons:
Winter remains low.
Summer and Fall drop sharply, to about ~300 or lower — nearly ¼ of the pre-pandemic peak.
Spring also falls dramatically.
This aligns with what we observed in earlier plots: the pandemic significantly reduced arrests.
Post-2021 (pandemic ends in May 2023):
In 2022–2024, some recovery is observed:
- Slight uptick in Spring and Fall arrests.
- Summer remains much lower than its pre-2020 highs.
- Overall, the seasonal differences are less pronounced in recent years than before 2020.
Seasonal Patterns throughout the years:
Summer > Fall ≈ Spring > Winter
Calendar Heatmap
# Prepare data
daily_arrests <- Arrests %>%
filter(!is.na(ArrestDate)) %>%
count(ArrestDate) %>%
mutate(year = lubridate::year(ArrestDate))
# Get years
years <- sort(unique(daily_arrests$year))
# Loop: save PNG for each year
for (yr in years) {
cat("Rendering year:", yr, "\n")
p <- ggplot_calendar_heatmap(
daily_arrests %>% filter(year == yr),
'ArrestDate',
'n'
) +
scale_fill_gradient(low = "white", high = "red", name = "Arrests", na.value = "grey") +
labs(
title = sprintf("Calendar Heatmap of Daily Arrests — Year: %s", yr),
subtitle = "Darker colors indicate more arrests, grey = missing data"
) +
theme_minimal(base_size = 14)
# ggsave(sprintf("calendar_%s.png", yr), plot = p, width = 12, height = 8)
}
## Rendering year: 2010
## Rendering year: 2011
## Rendering year: 2012
## Rendering year: 2013
## Rendering year: 2014
## Rendering year: 2015
## Rendering year: 2016
## Rendering year: 2017
## Rendering year: 2018
## Rendering year: 2019
## Rendering year: 2020
## Rendering year: 2021
## Rendering year: 2022
## Rendering year: 2023
## Rendering year: 2024
imgs <- list.files(
path = "heat map calendar for each year",
pattern = "\\.png$",
full.names = TRUE
) |> sort()
length(imgs)
## [1] 15
output_gif <- "calendar_heatmap_animation.gif"
gifski(
png_files = imgs,
gif_file = output_gif,
delay = 2, # adjust speed (seconds per frame)
width = 1200,
height = 800,
loop = TRUE
)
## [1] "/Users/xuziqing/Library/CloudStorage/OneDrive-UniversityofNorthCarolinaatChapelHill/STOR 320/FINAL project/calendar_heatmap_animation.gif"
Incidents <- Incidents %>%
mutate(
Date = as.Date(Occur_Date), # adjust as needed
Month = month(Date, label = TRUE),
Year = year(Date)
) %>%
filter(!is.na(Latitude) & !is.na(Longitude)) # keep valid rows
#new cleaned dataset pulled from github
ArrestsE <- read_csv("Police_Arrests_Clean2.csv")
## Rows: 37309 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (13): Charge, Street, City, State, Race, Gender, Ethnicity, Arrest_Type...
## dbl (8): Incident_Id, Zip, Age, latitude, longitude, Object_Id, year, mont...
## dttm (1): Arrest_Date
## date (1): ArrestDate
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ch= as.data.frame(filter(places(state= 'NC'), NAME=='Chapel Hill')$geometry[[1]][[1]][[1]])
## Retrieving data for the year 2024
## | | | 0% | | | 1% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |== | 4% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======= | 11% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 29% | |===================== | 30% | |===================== | 31% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================= | 34% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 39% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 49% | |=================================== | 50% | |=================================== | 51% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |========================================== | 61% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |======================================================== | 81% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 99% | |======================================================================| 100%
names(ArrestsE)
## [1] "Incident_Id" "Charge" "Street" "City"
## [5] "State" "Zip" "Arrest_Date" "Age"
## [9] "Race" "Gender" "Ethnicity" "Arrest_Type"
## [13] "Drugs_Alcohol" "Weapon" "Disposition" "latitude"
## [17] "longitude" "Object_Id" "ArrestDate" "year"
## [21] "month_num" "month" "season"
library(dplyr)
library(ggplot2)
# create output folder if it doesn't exist
if (!dir.exists("maps_2010")) dir.create("maps_2010")
# loop through months 1 to 12
for (m in 1:12) {
month_name <- month.abb[m] # "Jan", "Feb", etc.
freq_grid <- Arrests %>%
filter(year == 2010, month == month_name) %>%
mutate(
rlat = round(latitude * 200) / 200,
rlon = round(longitude * 200) / 200
) %>%
group_by(rlat, rlon) %>%
summarize(n = n(), .groups = "drop")
if (nrow(freq_grid) == 0) next
p <- ggplot() +
geom_tile(data = freq_grid, aes(x = rlon, y = rlat, fill = n)) +
geom_path(data = ch, aes(x = V1, y = V2), color = "black") +
coord_quickmap(xlim = c(-79.09, -78.99), ylim = c(35.86, 35.99)) +
labs(
title = "Arrest Frequency in Chapel Hill",
subtitle = sprintf("2010 - Month: %s", month_name),
fill = "Arrest Count",
x = "Longitude",
y = "Latitude"
) +
scale_fill_viridis_c() +
theme_minimal()
# ggsave(filename = sprintf("maps_2010/arrests_2010_%s.png", month_name),plot = p, width = 8, height = 6)
}
# get all PNGs recursively
pngs <- list.files(
path = "yearly_maps",
pattern = "\\.png$",
recursive = TRUE,
full.names = TRUE
)
# sort files by year and month
pngs_sorted <- pngs[order(pngs)] # assumes your files are named arrests_YYYY_MMM.png
# order chronologically
month_order <- setNames(1:12, month.abb)
pngs_sorted <- pngs %>%
tibble(path = .) %>%
mutate(
fname = basename(path),
year = as.numeric(stringr::str_extract(fname, "\\d{4}")),
month_str = stringr::str_extract(fname, paste(month.abb, collapse = "|")),
month_num = month_order[month_str]
) %>%
arrange(year, month_num) %>%
pull(path)
gifski(
png_files = pngs_sorted,
gif_file = "all_years_arrests.gif",
width = 1200,
height = 800,
delay = 2, # seconds per frame
loop = TRUE
)
## [1] "/Users/xuziqing/Library/CloudStorage/OneDrive-UniversityofNorthCarolinaatChapelHill/STOR 320/FINAL project/all_years_arrests.gif"